---
  # title: "How_I_learned_to_hate_you_two_years_all_months"
  # author: "///////"
  # date: '2022-08-10'
  
  
  # R version 4.1.3 (2022-03-10) -- "One Push-Up"
  # tidyverse 1.3.1
  # psych_2.1.6
  # writexl_1.4.0
  ---
  
  
library("tidyverse")

# importing & renaming columns with blank spaces for CrowdTangle datasets from 
# Febr 2020 to January 2022 (COVID; Romanian)

COVID_02_2020 <- read_csv('2022-08-06-11-18-12-CEST-search-csv-export.csv')
names(COVID_02_2020) <- make.names(names(COVID_02_2020), unique=TRUE)
COVID_02_2020

COVID_03_2020 <- read_csv('2022-08-06-11-19-32-CEST-search-csv-export.csv')
names(COVID_03_2020) <- make.names(names(COVID_03_2020), unique=TRUE)
COVID_03_2020

COVID_04_2020 <- read_csv('2022-08-06-11-19-35-CEST-search-csv-export.csv')
names(COVID_04_2020) <- make.names(names(COVID_04_2020), unique=TRUE)
COVID_04_2020

COVID_05_2020 <- read_csv('2022-08-06-11-20-09-CEST-search-csv-export.csv')
names(COVID_05_2020) <- make.names(names(COVID_05_2020), unique=TRUE)
COVID_05_2020

COVID_06_2020 <- read_csv('2022-08-06-11-20-06-CEST-search-csv-export.csv')
names(COVID_06_2020) <- make.names(names(COVID_06_2020), unique=TRUE)
COVID_06_2020

COVID_07_2020 <- read_csv('2022-08-06-11-21-12-CEST-search-csv-export.csv')
names(COVID_07_2020) <- make.names(names(COVID_07_2020), unique=TRUE)
COVID_07_2020

COVID_08_2020 <- read_csv('2022-08-06-11-33-21-CEST-search-csv-export.csv')
names(COVID_08_2020) <- make.names(names(COVID_08_2020), unique=TRUE)
COVID_08_2020

COVID_09_2020 <- read_csv('2022-08-06-11-21-36-CEST-search-csv-export.csv')
names(COVID_09_2020) <- make.names(names(COVID_09_2020), unique=TRUE)
COVID_09_2020

COVID_10_2020 <- read_csv('2022-08-06-11-21-46-CEST-search-csv-export.csv')
names(COVID_10_2020) <- make.names(names(COVID_10_2020), unique=TRUE)
COVID_10_2020

COVID_11_2020 <- read_csv('2022-08-06-11-22-29-CEST-search-csv-export.csv')
names(COVID_11_2020) <- make.names(names(COVID_11_2020), unique=TRUE)
COVID_11_2020

COVID_12_2020 <- read_csv('2022-08-06-11-22-32-CEST-search-csv-export.csv')
names(COVID_12_2020) <- make.names(names(COVID_12_2020), unique=TRUE)
COVID_12_2020

COVID_01_2021 <- read_csv('2022-08-06-11-22-49-CEST-search-csv-export.csv')
names(COVID_01_2021) <- make.names(names(COVID_01_2021), unique=TRUE)
COVID_01_2021

COVID_02_2021 <- read_csv('2022-08-06-11-22-51-CEST-search-csv-export.csv')
names(COVID_02_2021) <- make.names(names(COVID_02_2021), unique=TRUE)
COVID_02_2021

COVID_03_2021 <- read_csv('2022-08-06-11-22-52-CEST-search-csv-export.csv')
names(COVID_03_2021) <- make.names(names(COVID_03_2021), unique=TRUE)
COVID_03_2021

COVID_04_2021 <- read_csv('2022-08-06-11-23-02-CEST-search-csv-export.csv')
names(COVID_04_2021) <- make.names(names(COVID_04_2021), unique=TRUE)
COVID_04_2021

COVID_05_2021 <- read_csv('2022-07-19-15-02-19-CEST-search-csv-export.csv')
names(COVID_05_2021) <- make.names(names(COVID_05_2021), unique=TRUE)
COVID_05_2021

COVID_06_2021 <- read_csv('2022-07-19-15-25-58-CEST-search-csv-export.csv')
names(COVID_06_2021) <- make.names(names(COVID_06_2021), unique=TRUE)
COVID_06_2021

COVID_07_2021 <- read_csv('2022-07-19-15-30-33-CEST-search-csv-export.csv')
names(COVID_07_2021) <- make.names(names(COVID_07_2021), unique=TRUE)
COVID_07_2021

COVID_08_2021 <- read_csv('2022-07-19-15-38-36-CEST-search-csv-export.csv')
names(COVID_08_2021) <- make.names(names(COVID_08_2021), unique=TRUE)
COVID_08_2021

COVID_09_2021 <- read_csv('2022-07-19-15-48-00-CEST-search-csv-export.csv')
names(COVID_09_2021) <- make.names(names(COVID_09_2021), unique=TRUE)
COVID_09_2021

COVID_10_2021 <- read_csv('2022-07-19-15-55-33-CEST-search-csv-export.csv')
names(COVID_10_2021) <- make.names(names(COVID_10_2021), unique=TRUE)
COVID_10_2021

COVID_11_2021 <- read_csv('2022-07-19-15-57-12-CEST-search-csv-export.csv')
names(COVID_11_2021) <- make.names(names(COVID_11_2021), unique=TRUE)
COVID_11_2021

COVID_12_2021 <- read_csv('2022-07-19-16-09-30-CEST-search-csv-export.csv')
names(COVID_12_2021) <- make.names(names(COVID_12_2021), unique=TRUE)
COVID_12_2021

COVID_01_2022 <- read_csv('2022-07-19-16-12-20-CEST-search-csv-export.csv')
names(COVID_01_2022) <- make.names(names(COVID_01_2022), unique=TRUE)
COVID_01_2022


# calculating means for reactions and total number of posts, by Facebook.Id
# adding the new columns to the original dataset
# adding a column with the month

function_means <- function(x) {x %>% 
    group_by(Facebook.Id) %>%  
    summarise(count=n(),
              Total.Interactions.mean = mean(Total.Interactions, na.rm = TRUE),
              Likes.mean = mean(Likes, na.rm = TRUE),
              Comments.mean = mean(Comments, na.rm = TRUE),
              Shares.mean = mean(Shares, na.rm = TRUE),
              Love.mean = mean(Love, na.rm = TRUE),
              Wow.mean = mean(Wow, na.rm = TRUE),
              Haha.mean = mean(Haha, na.rm = TRUE),
              Sad.mean = mean(Sad, na.rm = TRUE),
              Angry.mean = mean(Angry, na.rm = TRUE),
              Care.mean = mean(Care, na.rm = TRUE)) %>%
    left_join(x,  by="Facebook.Id")}

COVID_02_2020_new <- function_means (COVID_02_2020)
COVID_02_2020_new <- mutate (COVID_02_2020_new, Month = "2020-02")

COVID_03_2020_new <- function_means (COVID_03_2020)
COVID_03_2020_new <- mutate (COVID_03_2020_new, Month = "2020-03")

COVID_04_2020_new <- function_means (COVID_04_2020)
COVID_04_2020_new <- mutate (COVID_04_2020_new, Month = "2020-04")

COVID_05_2020_new <- function_means (COVID_05_2020)
COVID_05_2020_new <- mutate (COVID_05_2020_new, Month = "2020-05")

COVID_06_2020_new <- function_means (COVID_06_2020)
COVID_06_2020_new <- mutate (COVID_06_2020_new, Month = "2020-06")

COVID_07_2020_new <- function_means (COVID_07_2020)
COVID_07_2020_new <- mutate (COVID_07_2020_new, Month = "2020-07")

COVID_08_2020_new <- function_means (COVID_08_2020)
COVID_08_2020_new <- mutate (COVID_08_2020_new, Month = "2020-08")

COVID_09_2020_new <- function_means (COVID_09_2020)
COVID_09_2020_new <- mutate (COVID_09_2020_new, Month = "2020-09")

COVID_10_2020_new <- function_means (COVID_10_2020)
COVID_10_2020_new <- mutate (COVID_10_2020_new, Month = "2020-10")

COVID_11_2020_new <- function_means (COVID_11_2020)
COVID_11_2020_new <- mutate (COVID_11_2020_new, Month = "2020-11")

COVID_12_2020_new <- function_means (COVID_12_2020)
COVID_12_2020_new <- mutate (COVID_12_2020_new, Month = "2020-12")

COVID_01_2021_new <- function_means (COVID_01_2021)
COVID_01_2021_new <- mutate (COVID_01_2021_new, Month = "2021-01")

COVID_02_2021_new <- function_means (COVID_02_2021)
COVID_02_2021_new <- mutate (COVID_02_2021_new, Month = "2021-02")

COVID_03_2021_new <- function_means (COVID_03_2021)
COVID_03_2021_new <- mutate (COVID_03_2021_new, Month = "2021-03")

COVID_04_2021_new <- function_means (COVID_04_2021)
COVID_04_2021_new <- mutate (COVID_04_2021_new, Month = "2021-04")

COVID_05_2021_new <- function_means (COVID_05_2021)
COVID_05_2021_new <- mutate (COVID_05_2021_new, Month = "2021-05")

COVID_06_2021_new <- function_means (COVID_06_2021)
COVID_06_2021_new <- mutate (COVID_06_2021_new, Month = "2021-06")

COVID_07_2021_new <- function_means (COVID_07_2021)
COVID_07_2021_new <- mutate (COVID_07_2021_new, Month = "2021-07")

COVID_08_2021_new <- function_means (COVID_08_2021)
COVID_08_2021_new <- mutate (COVID_08_2021_new, Month = "2021-08")

COVID_09_2021_new <- function_means (COVID_09_2021)
COVID_09_2021_new <- mutate (COVID_09_2021_new, Month = "2021-09")

COVID_10_2021_new <- function_means (COVID_10_2021)
COVID_10_2021_new <- mutate (COVID_10_2021_new, Month = "2021-10")

COVID_11_2021_new <- function_means (COVID_11_2021)
COVID_11_2021_new <- mutate (COVID_11_2021_new, Month = "2021-11")

COVID_12_2021_new <- function_means (COVID_12_2021)
COVID_12_2021_new <- mutate (COVID_12_2021_new, Month = "2021-12")

COVID_01_2022_new <- function_means (COVID_01_2022)
COVID_01_2022_new <- mutate (COVID_01_2022_new, Month = "2022-01")

# identifying 8 pro-prevention - cyan and 8 anti-prevention - coral Facebook IDs in a plot; 
# x = number of posts per Facebook ID, y = mean of total interactions, per Facebook ID

ggplot(COVID_03_2020_new, aes(x=count, y=Total.Interactions.mean, color=User.Name)) +
  geom_point(size = 3) +
  scale_color_manual(values = c( "LuisLazarusZeusTV"= "coral", "george.simion.unire"= "coral", 
                                 "OanaLov369"= "coral", "dradinaalberts"= "coral", 
                                 "DianaSosoacaOficial"= "coral", "razvan.constantinescu1"="coral",
                                 "gheorghe.piperea"="coral", "IosefinaIosefinaPascal"="coral",
                                 "drvasiradulescu"="cyan", "Digi24"="cyan", 
                                 "doctormihail"="cyan", "MinisterulSanatatii"="cyan", 
                                 "klausiohannis"="cyan", "ROVaccinare"="cyan", 
                                 "ProTVRomania"="cyan","ObservatorAntena1"="cyan")) +
  labs( x = "Number of posts per Facebook ID",
        y = "Mean of total interactions per Facebook ID",
        colour = "Anti and pro-prevention Facebook IDs",
        title = "March 2020")


ggplot(COVID_09_2020_new, aes(x=count, y=Total.Interactions.mean, color=User.Name)) +
  geom_point(size = 3) +
  scale_color_manual(values = c( "LuisLazarusZeusTV"= "coral", "george.simion.unire"= "coral", 
                                 "OanaLov369"= "coral", "dradinaalberts"= "coral", 
                                 "DianaSosoacaOficial"= "coral", "razvan.constantinescu1"="coral",
                                 "gheorghe.piperea"="coral", "IosefinaIosefinaPascal"="coral",
                                 "drvasiradulescu"="cyan", "Digi24"="cyan", 
                                 "doctormihail"="cyan", "MinisterulSanatatii"="cyan", 
                                 "klausiohannis"="cyan", "ROVaccinare"="cyan", 
                                 "ProTVRomania"="cyan","ObservatorAntena1"="cyan")) +
  labs( x = "Number of posts per Facebook ID",
        y = "Mean of total interactions per Facebook ID",
        colour = "Anti and pro-prevention Facebook IDs",
        title = "September 2020")

ggplot(COVID_03_2021_new, aes(x=count, y=Total.Interactions.mean, color=User.Name)) +
  geom_point(size = 3) +
  scale_color_manual(values = c( "LuisLazarusZeusTV"= "coral", "george.simion.unire"= "coral", 
                                 "OanaLov369"= "coral", "dradinaalberts"= "coral", 
                                 "DianaSosoacaOficial"= "coral", "razvan.constantinescu1"="coral",
                                 "gheorghe.piperea"="coral", "IosefinaIosefinaPascal"="coral",
                                 "drvasiradulescu"="cyan", "Digi24"="cyan", 
                                 "doctormihail"="cyan", "MinisterulSanatatii"="cyan", 
                                 "klausiohannis"="cyan", "ROVaccinare"="cyan", 
                                 "ProTVRomania"="cyan","ObservatorAntena1"="cyan")) +
  labs( x = "Number of posts per Facebook ID",
        y = "Mean of total interactions per Facebook ID",
        colour = "Anti and pro-prevention Facebook IDs",
        title = "March 2021")

ggplot(COVID_09_2021_new, aes(x=count, y=Total.Interactions.mean, color=User.Name)) +
  geom_point(size = 3) +
  scale_color_manual(values = c( "LuisLazarusZeusTV"= "coral", "george.simion.unire"= "coral", 
                                 "OanaLov369"= "coral", "dradinaalberts"= "coral", 
                                 "DianaSosoacaOficial"= "coral", "razvan.constantinescu1"="coral",
                                 "gheorghe.piperea"="coral", "IosefinaIosefinaPascal"="coral",
                                 "drvasiradulescu"="cyan", "Digi24"="cyan", 
                                 "doctormihail"="cyan", "MinisterulSanatatii"="cyan", 
                                 "klausiohannis"="cyan", "ROVaccinare"="cyan", 
                                 "ProTVRomania"="cyan","ObservatorAntena1"="cyan")) +
  labs( x = "Number of posts per Facebook ID",
        y = "Mean of total interactions per Facebook ID",
        colour = "Anti and pro-prevention Facebook IDs",
        title = "September 2021")

ggplot(COVID_04_2021_new, aes(x=count, y=Total.Interactions.mean, color=User.Name)) +
  geom_point(size = 3) +
  scale_color_manual(values = c( "LuisLazarusZeusTV"= "coral", "george.simion.unire"= "coral", 
                                 "OanaLov369"= "coral", "dradinaalberts"= "coral", 
                                 "DianaSosoacaOficial"= "coral", "razvan.constantinescu1"="coral",
                                 "gheorghe.piperea"="coral", "IosefinaIosefinaPascal"="coral",
                                 "drvasiradulescu"="cyan", "Digi24"="cyan", 
                                 "doctormihail"="cyan", "MinisterulSanatatii"="cyan", 
                                 "klausiohannis"="cyan", "ROVaccinare"="cyan", 
                                 "ProTVRomania"="cyan","ObservatorAntena1"="cyan")) +
  labs( x = "Number of posts per Facebook ID",
        y = "Mean of total interactions per Facebook ID",
        colour = "Anti and pro-prevention Facebook IDs",
        title = "April 2021")

# for the statistical analysis

sample <- c('LuisLazarusZeusTV', 'george.simion.unire', 'OanaLov369', 
            'dradinaalberts', 'DianaSosoacaOficial', 'razvan.constantinescu1', 
            'IosefinaIosefinaPascal', 'gheorghe.piperea', 'drvasiradulescu', 'Digi24', 'doctormihail',
            'MinisterulSanatatii', 'klausiohannis', 'ROVaccinare', 'ProTVRomania',
            'ObservatorAntena1')

function_stats <- function(x) {x %>%
    filter (User.Name %in% sample)  %>% 
    select (Facebook.Id, User.Name, Month, count, Total.Interactions.mean,
            Likes.mean, Comments.mean, Shares.mean, Love.mean,
            Wow.mean, Haha.mean, Sad.mean, Angry.mean, Care.mean) %>%
    mutate(Position = case_when 
           (User.Name == 'drvasiradulescu' ~ "provax", 
             User.Name == 'Digi24'  ~ "provax", 
             User.Name == 'doctormihail'  ~ "provax", 
             User.Name == 'MinisterulSanatatii' ~ "provax", 
             User.Name == 'klausiohannis' ~ "provax", 
             User.Name == 'ROVaccinare' ~ "provax", 
             User.Name == 'ProTVRomania'  ~ "provax", 
             User.Name == 'ObservatorAntena1' ~ "provax", 
             User.Name == 'LuisLazarusZeusTV' ~ "antivax", 
             User.Name == 'george.simion.unire' ~ "antivax", 
             User.Name == 'OanaLov369' ~ "antivax", 
             User.Name == 'dradinaalberts'  ~ "antivax",
             User.Name == 'DianaSosoacaOficial' ~ "antivax", 
             User.Name == 'razvan.constantinescu1' ~ "antivax", 
             User.Name == 'IosefinaIosefinaPascal' ~ "antivax", 
             User.Name == 'gheorghe.piperea' ~ "antivax")) %>%
    unique() }

COVID_02_2020_new_stats <- function_stats (COVID_02_2020_new)
COVID_03_2020_new_stats <- function_stats (COVID_03_2020_new)
COVID_04_2020_new_stats <- function_stats (COVID_04_2020_new)
COVID_05_2020_new_stats <- function_stats (COVID_05_2020_new)
COVID_06_2020_new_stats <- function_stats (COVID_06_2020_new)
COVID_07_2020_new_stats <- function_stats (COVID_07_2020_new)
COVID_08_2020_new_stats <- function_stats (COVID_08_2020_new)
COVID_09_2020_new_stats <- function_stats (COVID_09_2020_new)
COVID_10_2020_new_stats <- function_stats (COVID_10_2020_new)
COVID_11_2020_new_stats <- function_stats (COVID_11_2020_new)
COVID_12_2020_new_stats <- function_stats (COVID_12_2020_new)
COVID_01_2021_new_stats <- function_stats (COVID_01_2021_new)
COVID_02_2021_new_stats <- function_stats (COVID_02_2021_new)
COVID_03_2021_new_stats <- function_stats (COVID_03_2021_new)
COVID_04_2021_new_stats <- function_stats (COVID_04_2021_new)
COVID_05_2021_new_stats <- function_stats (COVID_05_2021_new)
COVID_06_2021_new_stats <- function_stats (COVID_06_2021_new)
COVID_07_2021_new_stats <- function_stats (COVID_07_2021_new)
COVID_08_2021_new_stats <- function_stats (COVID_08_2021_new)
COVID_09_2021_new_stats <- function_stats (COVID_09_2021_new)
COVID_10_2021_new_stats <- function_stats (COVID_10_2021_new)
COVID_11_2021_new_stats <- function_stats (COVID_11_2021_new)
COVID_12_2021_new_stats <- function_stats (COVID_12_2021_new)
COVID_01_2022_new_stats <- function_stats (COVID_01_2022_new)


# pasting together the dataframes prepared for the statistical analysis
# visualising the differences among reactions to antivax and provax

COVID_stats_merged <- do.call("rbind", list(COVID_02_2020_new_stats,
                                            COVID_03_2020_new_stats,
                                            COVID_04_2020_new_stats,
                                            COVID_05_2020_new_stats,
                                            COVID_06_2020_new_stats,
                                            COVID_07_2020_new_stats,
                                            COVID_08_2020_new_stats,
                                            COVID_09_2020_new_stats,
                                            COVID_10_2020_new_stats,
                                            COVID_11_2020_new_stats,
                                            COVID_12_2020_new_stats,
                                            COVID_01_2021_new_stats,
                                            COVID_02_2021_new_stats,
                                            COVID_03_2021_new_stats, 
                                            COVID_04_2021_new_stats,
                                            COVID_05_2021_new_stats,
                                            COVID_06_2021_new_stats,
                                            COVID_07_2021_new_stats,
                                            COVID_08_2021_new_stats,
                                            COVID_09_2021_new_stats,
                                            COVID_10_2021_new_stats,
                                            COVID_11_2021_new_stats,
                                            COVID_12_2021_new_stats,
                                            COVID_01_2022_new_stats))

ggplot(COVID_stats_merged, aes(x=Month, y=Total.Interactions.mean, color=Position)) +
  geom_point(size = 3) +
  facet_wrap(~ Position, nrow = 2) + 
  labs( x = "Year-Month",
        y = "Mean of total interactions per Facebook ID",
        colour = "Anti and pro-prevention Facebook IDs",
        title = "Total interactions per Facebook ID")+
  theme(axis.text.x = element_text(angle = 50, vjust = 1, hjust = 1))

ggplot(COVID_stats_merged, aes(x=Month, y=Haha.mean, color=Position)) +
  geom_point(size = 3) +
  facet_wrap(~ Position, nrow = 2) + 
  labs( x = "Year-Month",
        y = "Mean of Haha reactions per Facebook ID",
        colour = "Anti and pro-prevention Facebook IDs",
        title = "Haha reactions per Facebook ID")+
  theme(axis.text.x = element_text(angle = 50, vjust = 1, hjust = 1))

ggplot(COVID_stats_merged, aes(x=Month, y=Angry.mean, color=Position)) +
  geom_point(size = 3) +
  facet_wrap(~ Position, nrow = 2) + 
  labs( x = "Year-Month",
        y = "Mean of angry reactions per Facebook ID",
        colour = "Anti and pro-prevention Facebook IDs",
        title = "Angry reactions per Facebook ID")+ 
  theme(axis.text.x = element_text(angle = 50, vjust = 1, hjust = 1))

ggplot(COVID_stats_merged, aes(x=Month, y=Likes.mean, color=Position)) +
  geom_point(size = 3) +
  facet_wrap(~ Position, nrow = 2) + 
  labs( x = "Year-Month",
        y = "Mean of like reactions per Facebook ID",
        colour = "Anti and pro-prevention Facebook IDs",
        title = "Like reactions per Facebook ID")+
  theme(axis.text.x = element_text(angle = 50, vjust = 1, hjust = 1))

# boxplot visualization
# finding outliers

ggplot(COVID_stats_merged) +
  aes(x = Month, y = Angry.mean, fill= Position ) +
  geom_boxplot()+
  labs( x = "Year-Month",
        y = "Mean of angry reactions per Facebook ID",
        title = "Angry reactions per Facebook ID")+
  theme(axis.text.x = element_text(angle = 50, vjust = 1, hjust = 1))

Q1_angry <- quantile(COVID_stats_merged$Angry.mean, .25)
Q3_angry <- quantile(COVID_stats_merged$Angry.mean, .75)
IQR_angry <- IQR(COVID_stats_merged$Angry.mean)

outliers_angry <- subset(COVID_stats_merged, COVID_stats_merged$Angry.mean < (Q1_angry - 1.5*IQR_angry) | 
                           COVID_stats_merged$Angry.mean > (Q3_angry + 1.5*IQR_angry))
dim(outliers_angry)


ggplot(COVID_stats_merged) +
  aes(x = Month, y = Haha.mean, fill= Position ) +
  geom_boxplot()+
  labs( x = "Year-Month",
        y = "Mean of haha reactions per Facebook ID",
        title = "Haha reactions per Facebook ID")+
  theme(axis.text.x = element_text(angle = 50, vjust = 1, hjust = 1))

Q1_haha <- quantile(COVID_stats_merged$Haha.mean, .25)
Q3_haha <- quantile(COVID_stats_merged$Haha.mean, .75)
IQR_haha <- IQR(COVID_stats_merged$Haha.mean)

outliers_haha <- subset(COVID_stats_merged, COVID_stats_merged$Haha.mean < (Q1_haha - 1.5*IQR_haha) | 
                          COVID_stats_merged$Haha.mean > (Q3_haha + 1.5*IQR_haha))
dim(outliers_haha)


ggplot(COVID_stats_merged) +
  aes(x = Month, y = Total.Interactions.mean, fill= Position ) +
  geom_boxplot()+
  labs( x = "Year-Month",
        y = "Mean of total interactions per Facebook ID",
        title = "Total interactions per Facebook ID") +
  theme(axis.text.x = element_text(angle = 50, vjust = 1, hjust = 1))

Q1_Total.Interactions <- quantile(COVID_stats_merged$Total.Interactions.mean, .25)
Q3_Total.Interactions <- quantile(COVID_stats_merged$Total.Interactions.mean, .75)
IQR_Total.Interactions <- IQR(COVID_stats_merged$Total.Interactions.mean)

outliers_Total.Interactions <- subset(COVID_stats_merged, COVID_stats_merged$Total.Interactions.mean < 
                                        (Q1_Total.Interactions - 1.5*IQR_Total.Interactions) | 
                                        COVID_stats_merged$Total.Interactions.mean > (Q3_Total.Interactions + 1.5*IQR_Total.Interactions))
dim(outliers_Total.Interactions)


ggplot(COVID_stats_merged) +
  aes(x = Month, y = Likes.mean, fill= Position ) +
  geom_boxplot()+
  labs( x = "Year-Month",
        y = "Mean of Like reactions per Facebook ID",
        title = "Like reactions per Facebook ID")+
  theme(axis.text.x = element_text(angle = 50, vjust = 1, hjust = 1))

Q1_likes <- quantile(COVID_stats_merged$Likes.mean, .25)
Q3_likes <- quantile(COVID_stats_merged$Likes.mean, .75)
IQR_likes <- IQR(COVID_stats_merged$Likes.mean)

outliers_likes <- subset(COVID_stats_merged, COVID_stats_merged$Likes.mean < (Q1_likes - 1.5*IQR_likes) | 
                           COVID_stats_merged$Likes.mean > (Q3_likes + 1.5*IQR_likes))
dim(outliers_likes)



# exporting data on outliers
library("writexl")
write_xlsx(outliers_haha,"outliers_haha.xlsx")
write_xlsx(outliers_angry,"outliers_angry.xlsx")
write_xlsx(outliers_Total.Interactions,"outliers_Total.Interactions.xlsx")
write_xlsx(outliers_likes,"outliers_Likes.xlsx")


# descriptive statistics
library(psych)

describeBy(COVID_stats_merged, COVID_stats_merged$Position)

# skewness & kurtosis do not indicate normal distributions 

# using the log to obtain a normal distribution
# replacing the error of log10(0) with 0

COVID_stats_merged_log <- COVID_stats_merged %>% 
  mutate(log.Total.Interactions.mean = log10(Total.Interactions.mean), 
         log.Haha.mean = log10(Haha.mean),
         log.Angry.mean = log10(Angry.mean),
         log.Likes.mean = log10(Likes.mean))

COVID_stats_merged_log[COVID_stats_merged_log == '-Inf'] <- 0

describeBy(COVID_stats_merged_log, COVID_stats_merged_log$Position)

#ANOVA analysis on type of reaction and groups (by month and by position)

anova(lm(log.Total.Interactions.mean ~ Position + Month, COVID_stats_merged_log))
anova(lm(log.Haha.mean ~ Position + Month, COVID_stats_merged_log))
anova(lm(log.Angry.mean ~ Position + Month, COVID_stats_merged_log))
anova(lm(log.Likes.mean ~ Position + Month, COVID_stats_merged_log))


# total monthly interactions for COVID-related posts, for the whole database
# number of infections and number of fatalities, in Romania - WTO data

Total.interactions = c(sum(COVID_02_2020$Total.Interactions), sum(COVID_03_2020$Total.Interactions), 
              sum(COVID_04_2020$Total.Interactions), sum(COVID_05_2020$Total.Interactions), 
              sum(COVID_06_2020$Total.Interactions), sum(COVID_07_2020$Total.Interactions), 
              sum(COVID_08_2020$Total.Interactions), sum(COVID_09_2020$Total.Interactions),
              sum(COVID_10_2020$Total.Interactions), sum(COVID_11_2020$Total.Interactions), 
              sum(COVID_12_2020$Total.Interactions), sum(COVID_01_2021$Total.Interactions),
              sum(COVID_02_2021$Total.Interactions), sum(COVID_03_2021$Total.Interactions), 
              sum(COVID_04_2021$Total.Interactions), sum(COVID_05_2021$Total.Interactions), 
              sum(COVID_06_2021$Total.Interactions), sum(COVID_07_2021$Total.Interactions), 
              sum(COVID_08_2021$Total.Interactions), sum(COVID_09_2021$Total.Interactions),
              sum(COVID_10_2021$Total.Interactions), sum(COVID_11_2021$Total.Interactions), 
              sum(COVID_12_2021$Total.Interactions), sum(COVID_01_2022$Total.Interactions))

Months = c ('2020-02', '2020-03', '2020-04','2020-05', '2020-06', '2020-07', '2020-08',
            '2020-09', '2020-10', '2020-11', '2020-12', '2021-01',
            '2021-02', '2021-03', '2021-04','2021-05', '2021-06', '2021-07', '2021-08',
            '2021-09', '2021-10', '2021-11', '2021-12', '2022-01')

COVID_TI <- data.frame(Months, Total.interactions)
head(COVID_TI)


ggplot(COVID_TI) +
  aes(x = Months, y = Total.interactions, group=1) +
  geom_line() + geom_point() +
  labs( x = "Year-Month",
        y = "Total interactions",
        title = "Total monthly interactions for COVID-related posts") +
  theme(axis.text.x = element_text(angle = 50, vjust = 1, hjust = 1))

#for comparation between the whole dataset and the sample
sum(COVID_TI [, 'Total.interactions'])
sum(COVID_stats_merged [, 'count'])
write_xlsx(COVID_stats_merged, 'COVID_stats_merged.xlsx')


# importing WTO datasets on COVID (illness & mortality data, downloaded 20-08-2022)

COVID_WTO_ill<- read_csv("WHO-COVID-19-global-data.csv")


# filtering data on Romania 

COVID_WTO_ill_RO <- COVID_WTO_ill %>% filter(Country=='Romania') 

ggplot(COVID_WTO_ill_RO) +
  aes(x = Date_reported, y = New_deaths, group=1) +
  geom_line() + geom_point() +
  labs( x = "Date",
        y = "New deaths reported",
        title = "The evolution of COVID mortality in Romania, 2020-2022") +
  theme(axis.text.x = element_text(angle = 50, vjust = 1, hjust = 1))



ggplot(COVID_WTO_ill_RO) +
  aes(x = Date_reported, y = New_cases, group=1) +
  geom_line() + geom_point() +
  labs( x = "Date",
        y = "New COVID cases",
        title = "The evolution of COVID cases in Romania, 2020-2022") +
  theme(axis.text.x = element_text(angle = 50, vjust = 1, hjust = 1))


# Printing information about R and loaded packages, for references
sessionInfo()
citation("tidyverse")
citation("psych")
citation()
citation('writexl')
